#!/usr/bin/env python3
"""
Standardize lens CSV columns for T3 so pre-stack uses the frozen size bins.

- Ensures RG_kpc exists (mirrors R_G_kpc if needed)
- Ensures RG_bin exists (mirrors R_G_bin or recomputes from RG_kpc)
- Ensures Mstar_bin exists (from Mstar_log10)
- Applies to data/lenses_true.csv -> data/lenses.csv (active lenses)
- If data/lenses_random.csv exists, standardizes it in-place too
"""

import os
import pandas as pd
import numpy as np

RG_EDGES = [1.5, 3.0, 5.0, 8.0, 12.0]       # kpc (frozen)
MS_EDGES = [10.2, 10.5, 10.8, 11.1]         # log10(M/Msun) (frozen)

def _ensure_cols(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()

    # RG_kpc: copy from R_G_kpc if needed
    if "RG_kpc" not in df.columns:
        if "R_G_kpc" in df.columns:
            df["RG_kpc"] = pd.to_numeric(df["R_G_kpc"], errors="coerce")
        else:
            # if neither present, leave NaN; pre-stack will warn
            df["RG_kpc"] = np.nan

    # RG_bin: prefer existing; else mirror R_G_bin; else compute from RG_kpc
    if "RG_bin" not in df.columns:
        if "R_G_bin" in df.columns:
            df["RG_bin"] = df["R_G_bin"].astype(str)
        else:
            df["RG_bin"] = pd.cut(
                pd.to_numeric(df["RG_kpc"], errors="coerce"),
                RG_EDGES, right=False
            ).astype(str)

    # Mstar_bin: compute if missing
    if "Mstar_bin" not in df.columns and "Mstar_log10" in df.columns:
        df["Mstar_bin"] = pd.cut(
            pd.to_numeric(df["Mstar_log10"], errors="coerce"),
            MS_EDGES, right=False
        ).astype(str)

    return df

def main():
    lenses_true = "data/lenses_true.csv"
    lenses = "data/lenses.csv"
    randoms = "data/lenses_random.csv"

    if not os.path.exists(lenses_true):
        raise SystemExit(f"Missing {lenses_true}. Run extract_true_sizes_from_tiles.py first.")

    # Standardize active lenses: lenses_true.csv -> lenses.csv
    dfL = pd.read_csv(lenses_true)
    dfL = _ensure_cols(dfL)
    dfL.to_csv(lenses, index=False)
    print(f"[OK] Wrote standardized lenses to {lenses} (rows={len(dfL):,})")

    # Standardize randoms in place (if present)
    if os.path.exists(randoms):
        dfR = pd.read_csv(randoms)
        dfR = _ensure_cols(dfR)
        dfR.to_csv(randoms, index=False)
        print(f"[OK] Updated {randoms} in place (rows={len(dfR):,})")
    else:
        print("[INFO] No lenses_random.csv yet; generate it after this.")

if __name__ == "__main__":
    main()
